Load the libraries
> list.of.packages <- c("xlsx", "kableExtra", "dplyr", "ggplot2", "egg", "cowplot",
+ "patchwork", "gridExtra", "UsingR", "car", "lattice", "ggpubr", "ggbreak", "GGally",
+ "reshape2", "ggcorrplot", "corrplot", "rela", "ggrepel", "factoextra", "chemometrics",
+ "sparsepca", "vegan", "ca", "ade4", "pls", "OmicsPLS")
> new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] ## check for packages not installed
> if (length(new.packages)) install.packages(new.packages) ## install packages if necessary
> res <- unlist(lapply(list.of.packages, require, character.only = T)) ## load packages needed for the session
> if (any(res == F)) {
+ list.of.packages[which(res == F)] ## show those package if you have troubles to install.
+ }
>
> list.of.bioc.packages <- c("phyloseq", "pcaMethods", "ropls", "mixOmics")
> new.packages.bioc <- list.of.packages[!(list.of.bioc.packages %in% installed.packages()[,
+ "Package"])]
> if (length(new.packages.bioc)) BiocManager::install(new.packages.bioc)
> res.bioc <- unlist(lapply(list.of.bioc.packages, require, character.only = T))
> if (any(res.bioc == F)) {
+ list.of.packages[which(res.bioc == F)] ## show those package if you have troubles to install.
+ }Functions to use
> resumen <- function(x) {
+ c(round(mean(x), 4), round(sd(x), 4))
+ } # Function that help us to summarize the data in mean and sd values
> lowerFn <- function(data, mapping, method = "lm", ...) {
+ p <- ggplot(data = data, mapping = mapping) + geom_point(colour = "blue") + geom_smooth(method = method,
+ color = "red", ...)
+ p
+ }
>
>
> ## PlotPCA for different methods.
> plotPCA_methods <- function(X.list, factor, title1, title2, size = 1.5, glineas = 0.25,
+ labels.scores, labels.loads) {
+
+ X.scores <- X.list$scores
+ X.loadings <- X.list$loadings
+ colnames(X.scores) <- paste0("PC", 1:dim(X.scores)[2])
+
+ colnames(X.loadings) <- paste0("PC", 1:dim(X.loadings)[2])
+ varianza <- as.data.frame(X.list$var)
+ varianza$PC <- paste0("PC", 1:length(X.list$var))
+ colnames(varianza) <- c("varianza", "PC")
+ # scores
+ X.scores <- as.data.frame(X.scores)
+ Group <- factor[[1]]
+ Group2 <- factor[[2]]
+ colores <- 1:length(levels(Group))
+ # main plot
+ p1 <- ggplot(X.scores, aes(x = PC1, y = PC2)) + theme_classic() + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.55, size = 3) + coord_cartesian(xlim = c(min(X.scores[,
+ 1]), max(X.scores[, 1]))) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+ grafico <- p1 + geom_text_repel(aes(y = PC2, label = labels.scores), segment.size = 0.25,
+ size = size) + labs(x = c(paste("PC1", round(varianza[1, 1], 2), "%")), y = c(paste("PC2",
+ round(varianza[2, 1], 2), "%"))) + ggtitle(paste("Principal Component Analysis for: ",
+ title1, sep = " ")) + theme(plot.title = element_text(hjust = 0.5)) + scale_color_manual(values = colores)
+
+ X.loadings <- as.data.frame(X.loadings)
+ # main plot
+ p2 <- ggplot(X.loadings, aes(x = PC1, y = PC2)) + theme_classic() + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = "gray70"),
+ alpha = 0.55, size = 3) + coord_cartesian(xlim = c(min(X.loadings[, 1]),
+ max(X.loadings[, 1])))
+ # avoiding labels superposition
+
+ grafico2 <- p2 + geom_text_repel(aes(label = labels.loads), segment.size = 0.25,
+ size = size) + labs(x = c(paste("PC1", round(varianza[1, 1], 2), "%")), y = c(paste("PC2",
+ round(varianza[2, 1], 2), "%"))) + ggtitle(paste("Principal Component Analysis for: ",
+ title2, sep = " ")) + theme(plot.title = element_text(hjust = 0.5)) + theme(legend.position = "none")
+
+ graficos <- list(grafico, grafico2)
+ return(graficos)
+ }
>
> proces_bacteria <- function(datos, tipo) {
+
+ Order <- bacteria_phylum.abs$Order
+ variables_in_bacteria <- general_data[general_data$Orden %in% Order, ]
+ Sample <- variables_in_bacteria$Paciente
+
+
+ if (tipo == "genera") {
+
+ Order <- Order
+
+ X <- datos[-1, -1]
+ X <- apply(X, 2, as.numeric)
+ colnames(X) <- datos[1, 2:ncol(datos)]
+ rownames(X) <- Sample
+ X.rel <- 100 * X/rowSums(X)
+
+
+ } else if (tipo == "phylum") {
+
+ X <- datos[, -c(1:2)]
+ colnames(X) <- colnames(datos)[3:ncol(datos)]
+ rownames(X) <- Sample
+ X.rel <- 100 * X/rowSums(X)
+ }
+
+ GROUP <- variables_in_bacteria$GROUP
+ SEX <- variables_in_bacteria$SEX
+ OBESE <- variables_in_bacteria$OBESE
+
+ X <- as.data.frame(X)
+ X.rel <- as.data.frame(X.rel)
+
+ X$GROUP <- GROUP
+ X$SEX <- SEX
+ X$OBESE <- OBESE
+
+
+ X.rel$GROUP <- GROUP
+ X.rel$SEX <- SEX
+ X.rel$OBESE <- OBESE
+
+ return(list(abs = X, rel = X.rel))
+
+ }
>
>
> ## Creamos los objetos de phyloseq
>
> create_phyloseq <- function(datos, taxon) {
+
+ GROUP <- variables_in_bacteria$GROUP
+ SEX <- variables_in_bacteria$SEX
+ OBESE <- variables_in_bacteria$OBESE
+ datos <- datos
+
+ X <- datos[, -c(ncol(datos) - 2, ncol(datos) - 1, ncol(datos))]
+
+ Xt <- t(X)
+
+ rownames(Xt) <- paste0("OTU", 1:nrow(Xt))
+ OTU <- otu_table(Xt, taxa_are_rows = T) # creamos Otu table.
+
+ tax <- as.matrix(colnames(X), ncol = 1)
+ TAX <- tax_table(tax)
+ taxa_names(TAX) <- paste0("OTU", 1:ncol(X))
+ colnames(TAX) <- taxon
+
+
+ ## Creamos sample table
+ samples <- as.data.frame(matrix(rep(0, dim(X)[1] * 3), nrow = nrow(X), ncol = 3))
+ samples[, 1] <- GROUP
+ samples[, 2] <- OBESE
+ samples[, 3] <- SEX
+ colnames(samples) <- c("GROUP", "OBESE", "SEX")
+ rownames(samples) <- variables_in_bacteria$Paciente
+ SAM <- sample_data(samples)
+
+ phyloseq_obj <- phyloseq(OTU, TAX, SAM)
+
+ return(phyloseq_obj)
+
+ }
>
> perf <- function(X, X.pred) {
+ # print(dim(X)) print(dim(X.pred))
+ return(abs(norm(X - X.pred, type = "F"))/norm(X, type = "F"))
+
+ }
>
> spca.perf <- function(X, fix_parameter, transpose, scale.) {
+
+ parameter <- seq(1e-10, 1, 0.01)
+ perf.vector <- vector(mode = "numeric", length = length(parameter))
+ if (fix_parameter == "alpha") {
+
+ beta <- parameter
+ alpha <- 1e-04
+ if (transpose == F) {
+
+ for (i in 1:length(parameter)) {
+ perf.vector[i] <- perf(as.matrix((X)), t(spca((X), alpha = alpha,
+ beta = beta[i], center = T, scale = scale., verbose = F, k = ncol(X))$loadings))
+
+ }
+ beta <- beta[which.min(perf.vector)]
+ pcx <- spca(X, alpha = alpha, beta = beta, verbose = F, center = T, scale = scale.,
+ k = ncol(X))
+
+ } else if (transpose == T) {
+
+ for (i in 1:length(parameter)) {
+ perf.vector[i] <- perf(as.matrix(t(X)), (spca(t(X), alpha = alpha,
+ beta = beta[i], verbose = F, center = T, scale = scale., k = ncol(X))$scores))
+
+ }
+ beta <- beta[which.min(perf.vector)]
+ pcx <- spca(t(X), alpha = alpha, beta = beta, verbose = F, center = T,
+ scale = scale., k = ncol(X))
+ }
+
+ } else if (fix_parameter == "beta") {
+
+ alpha <- parameter
+ beta <- 1e-04
+ if (transpose == F) {
+
+ for (i in 1:length(parameter)) {
+ perf.vector[i] <- perf(as.matrix((X)), t(spca((X), alpha = alpha[i],
+ beta = beta, center = T, scale = scale., verbose = F, k = ncol(X))$loadings))
+
+ }
+ alpha <- alpha[which.min(perf.vector)]
+ pcx <- spca(X, alpha = alpha, beta = beta, verbose = F, center = T, scale = scale.,
+ k = ncol(X))
+ } else if (transpose == T) {
+
+ for (i in 1:length(parameter)) {
+ perf.vector[i] <- perf(as.matrix(t(X)), (spca(t(X), alpha = alpha[i],
+ center = T, scale = scale., beta = beta, verbose = F, k = ncol(X))$scores))
+
+ }
+
+ alpha <- alpha[which.min(perf.vector)]
+ pcx <- spca(t(X), alpha = alpha, beta = beta, center = T, scale = scale.,
+ verbose = F, k = ncol(X))
+ }
+
+
+ }
+
+ return(pcx)
+ }
>
> plotCA_methods <- function(ca.obj, factor, title, size = 1.5, glineas = 0.25, labels.bac,
+ labels.samples) {
+
+ X.cord_col_stand <- data.frame(ca.obj$colcoord)
+ X.cord_col_prin <- data.frame(ca.obj$colcoord %*% diag(ca.obj$sv))
+ X.cord_row_stand <- data.frame(ca.obj$rowcoord)
+ X.cord_row_prin <- data.frame(ca.obj$rowcoord %*% diag(ca.obj$sv))
+
+ colnames(X.cord_col_stand) <- paste0("dim", 1:dim(X.cord_col_stand)[2])
+ colnames(X.cord_col_prin) <- paste0("dim", 1:dim(X.cord_col_prin)[2])
+ colnames(X.cord_row_stand) <- paste0("dim", 1:dim(X.cord_row_stand)[2])
+ colnames(X.cord_row_prin) <- paste0("dim", 1:dim(X.cord_row_prin)[2])
+
+ data.plot.col <- as.data.frame(cbind(X.cord_col_stand[, 1], X.cord_col_prin[,
+ 2]))
+ colnames(data.plot.col) <- c("dim1", "dim2")
+
+ data.plot.row <- as.data.frame(cbind(X.cord_row_stand[, 1], X.cord_row_prin[,
+ 2]))
+ colnames(data.plot.row) <- c("dim1", "dim2")
+ inercia <- data.frame(100 * ca.obj$sv^2/sum(ca.obj$sv^2))
+ colnames(inercia) <- "Inercia"
+ inercia$DIM <- paste("cim", 1:length(ca.obj$sv))
+ Group <- factor[[1]]
+ Group2 <- factor[[2]]
+ colores <- 1:length(levels(Group))
+
+
+ # computamos plot asymétrico con las columnas (bacterias)
+ p1 <- ggplot(data.plot.col, aes(x = dim1, y = dim2)) + theme_classic() + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + coord_cartesian(xlim = c(min(data.plot.col[,
+ 1]), max(data.plot.col[, 1])))
+
+
+ # avoiding labels superposition
+
+ grafico_columnas <- p1 + geom_text_repel(aes(y = dim2, label = labels.bac), segment.size = 0.1,
+ size = size) + geom_point(aes(y = dim2)) + labs(x = c(paste("dim1", round(inercia[1,
+ 1], 2), "%")), y = c(paste("dim2", round(inercia[2, 1], 2), "%"))) + ggtitle(paste("Canonical Correlation Analysis: Columns asymetric plot for ",
+ title, sep = " ")) + theme(plot.title = element_text(hjust = 0.5))
+
+
+
+ # main plot
+ p2 <- ggplot(data.plot.row, aes(x = dim1, y = dim2)) + theme_classic() + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.55, size = 3) + scale_fill_discrete(name = "Group") +
+ coord_cartesian(xlim = c(min(data.plot.row[, 1]), max(data.plot.row[, 1])))
+ # avoiding labels superposition
+
+ grafico_filas <- p2 + geom_text_repel(aes(label = labels.samples), segment.size = 0.25,
+ size = size) + labs(x = c(paste("dim1", round(inercia[1, 1], 2), "%")), y = c(paste("dim2",
+ round(inercia[2, 1], 2), "%"))) + ggtitle(paste("Canonical Correlation Analysis: Rows asymetric plot for: ",
+ title, sep = " ")) + theme(plot.title = element_text(hjust = 0.5)) + theme(legend.position = "right")
+
+ onjeto_cca <- list(plot_cols = grafico_columnas, plot_rows = grafico_filas, prin_rows = X.cord_row_prin,
+ stand_rows = X.cord_row_stand, prin_cols = X.cord_col_prin, stand_cols = X.cord_col_stand)
+ return(onjeto_cca)
+ }
>
>
>
>
> plotMDS <- function(mds.obj, factor, title, labels, method, size = 1.5, glineas = 0.25) {
+
+ Group <- factor[[1]]
+ Group2 <- factor[[2]]
+ colores <- 1:length(levels(Group))
+ if (method == "meta") {
+
+ X.points <- data.frame(mds.obj$points)
+ X.scores <- data.frame(mds.obj$species)
+
+ # main plot
+ p1 <- ggplot(X.points, aes(x = MDS1, y = MDS2)) + theme_classic() + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.55, size = 3) + coord_cartesian(xlim = c(min(c(X.points[,
+ 1], X.scores[, 1])), max(c(X.points[, 1], X.scores[, 1])))) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+ grafico <- p1 + geom_text_repel(aes(y = MDS2, label = labels), segment.size = 0.25,
+ size = size) + labs(y = c(paste("stress", round(100 * mds.obj$stress,
+ 2), "%"))) + ggtitle(paste("Non-metric MDS (meta) for ", title, sep = " ")) +
+ theme(plot.title = element_text(hjust = 0.5)) + scale_color_manual(values = colores)
+
+ grafico2 <- ggplot(data = X.scores, aes(MDS1, MDS2)) + geom_text_repel(data = X.scores,
+ aes(y = MDS2, label = rownames(X.scores)), segment.size = 0.25, size = 2) +
+ labs(y = c(paste("Total stress", 100 * round(mds.obj$stress, 2), "%"))) +
+ ggtitle(paste("Non-metric MDS (meta) for ", title, sep = " ")) + coord_cartesian(xlim = c(min(X.points[,
+ 1]), max(X.points[, 1]))) + theme(plot.title = element_text(hjust = 0.5))
+
+
+ resultado <- list(grafico1 = grafico, grafico2 = grafico2, dim1_samples = X.points[,
+ 1], dim2_samples = X.points[, 2], dim1_species = X.scores[, 1], dim2_species = X.scores[,
+ 2])
+
+
+
+ } else if (method == "iso") {
+ X.points <- data.frame(mds.obj$points)
+
+ colnames(X.points) <- paste0("MDS", 1:ncol(X.points))
+ colores <- 1:length(levels(Group))
+ # main plot
+ p1 <- ggplot(X.points, aes(x = MDS1, y = MDS2)) + theme_classic() + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.55, size = 3) + coord_cartesian(xlim = c(min(X.points[,
+ 1]), max(X.points[, 1]))) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+ grafico <- p1 + geom_text_repel(aes(y = MDS2, label = labels), segment.size = 0.25,
+ size = size) + labs(y = c(paste("Total stress", round(mds.obj$stress,
+ 2), "%"))) + ggtitle(paste("Non-metric MDS (iso) for ", title, sep = " ")) +
+ theme(plot.title = element_text(hjust = 0.5)) + scale_color_manual(values = colores)
+
+
+ resultado <- list(grafico1 = grafico, dim1_samples = X.points[, 1], dim2_samples = X.points[,
+ 2])
+ }
+
+
+ return(resultado)
+ }
>
>
> plotPcoA <- function(mds.obj, factor, title, labels, size = 1.5, glineas = 0.25) {
+
+ Group <- factor[[1]]
+ Group2 <- factor[[2]]
+ colores <- 1:length(levels(Group))
+ varianza <- 100 * round(mds.obj$eig/sum(mds.obj$eig), 2)
+ X.points <- data.frame(mds.obj$points)
+
+ colnames(X.points) <- paste0("PC", 1:ncol(X.points))
+ colores <- 1:length(levels(Group))
+ # main plot
+ p1 <- ggplot(X.points, aes(x = PC1, y = PC2)) + theme_classic() + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.55, size = 3) + coord_cartesian(xlim = c(min(X.points[,
+ 1]), max(X.points[, 1]))) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+ grafico <- p1 + geom_text_repel(aes(y = PC2, label = labels), segment.size = 0.25,
+ size = size) + labs(x = c(paste("PC1", round(varianza[1], 2), "%")), y = c(paste("PC2",
+ round(varianza[2], 2), "%"))) + ggtitle(paste("PCoA for ", title, sep = " ")) +
+ theme(plot.title = element_text(hjust = 0.5)) + scale_color_manual(values = colores)
+
+
+ resultado <- list(grafico1 = grafico, dim1_samples = X.points[, 1], dim2_samples = X.points[,
+ 2])
+
+
+
+ return(resultado)
+ }
>
>
>
> plot_O2PLS <- function(obj_pls, title, xcomp, ycomp, xorto, yorto, size = 1.5, glineas = 0.25) {
+ grupos <- list(variables_in_bacteria$GROUP, variables_in_bacteria$OBESE)
+ Group <- grupos[[1]]
+ Group2 <- grupos[[2]]
+
+ colores <- 1:length(levels(Group))
+ labels <- variables_in_bacteria$Paciente
+ X.joint <- as.data.frame(obj_pls$Tt)
+ colnames(X.joint) <- paste0("X_Joint", 1:ncol(X.joint))
+ compxjoint <- colnames(X.joint)
+ Y.joint <- as.data.frame(obj_pls$U)
+ colnames(Y.joint) <- paste0("Y_Joint", 1:ncol(Y.joint))
+ compyjoint <- colnames(Y.joint)
+ X.orto <- as.data.frame(obj_pls$T_Yosc)
+ colnames(X.orto) <- paste0("X_Ortho", 1:ncol(X.orto))
+ compxorto <- colnames(X.orto)
+ Y.orto <- as.data.frame(obj_pls$U_Xosc)
+ colnames(Y.orto) <- paste0("Y_Ortho", 1:ncol(Y.orto))
+ compyorto <- colnames(Y.orto)
+
+ ## Construimos los dataframespara los gráficos.
+
+ data_conjunto <- cbind(X.joint, Y.joint)
+ data_XYorto <- cbind(X.joint, Y.orto)
+ data_YXorto <- cbind(Y.joint, X.orto)
+ data_orto <- cbind(X.orto, Y.orto)
+ ## Grafico conjunto Primero las componentes conjuntas.
+
+ p1 <- ggplot(data_conjunto, aes_string(x = compxjoint[xcomp], y = compyjoint[ycomp])) +
+ theme(legend.position = "none") + geom_hline(yintercept = 0, color = "gray70") +
+ geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.5, size = 3) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+
+
+ p2 <- ggplot(data_XYorto, aes_string(x = compxjoint[xcomp], y = compyorto[yorto])) +
+ theme(legend.position = "none") + geom_hline(yintercept = 0, color = "gray70") +
+ geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.5, size = 3) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+
+ p3 <- ggplot(data_YXorto, aes_string(x = compyjoint[ycomp], y = compxorto[xorto])) +
+ theme(legend.position = "none") + geom_hline(yintercept = 0, color = "gray70") +
+ geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.5, size = 3) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+
+ p4 <- ggplot(data_orto, aes_string(x = compxorto[xorto], y = compyorto[yorto])) +
+ theme(legend.position = "none") + geom_hline(yintercept = 0, color = "gray70") +
+ geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.5, size = 3) + scale_fill_discrete(name = "Group")
+ # avoiding labels superposition
+
+
+ ggarrange(p1, p2, p3, p4, ncol = 2, nrow = 2, common.legend = T, legend = "bottom")
+
+
+
+ }
>
> plotPLS2 <- function(pls_obj, title, labels, cmpx, cmpy, size = 1.5, glineas = 0.25) {
+ factor <- list(variables_in_bacteria$GROUP, variables_in_bacteria$OBESE)
+ X.scores <- as.data.frame(matrix(pls_obj$scores, ncol = ncol(pls_obj$scores),
+ nrow = nrow(pls_obj$scores)))
+ colnames(X.scores) <- paste0("PCx", 1:dim(X.scores)[2])
+ compxscores <- colnames(X.scores)
+ Y.scores <- as.data.frame(matrix(pls_obj$Yscores, ncol = ncol(pls_obj$Yscores),
+ nrow = nrow(pls_obj$Yscores)))
+ colnames(Y.scores) <- paste0("PCy", 1:dim(Y.scores)[2])
+ compyscores <- colnames(Y.scores)
+ data_conjunto <- cbind(X.scores, Y.scores)
+ varianza <- as.data.frame(100 * pls_obj$Xvar/sum(pls_obj$Xvar))
+
+ varianza$PC <- paste0("PC", 1:length(pls_obj$Xvar))
+ colnames(varianza) <- c("varianza", "PC")
+ colnames(X.scores) <- paste0("PC", 1:length(pls_obj$Xvar))
+ # scores
+ Group <- factor[[1]]
+ Group2 <- factor[[2]]
+ colores <- 1:length(levels(Group))
+ # main plot
+ p1 <- ggplot(data_conjunto, aes_string(x = compxscores[cmpx], y = compyscores[cmpy])) +
+ theme_classic() + theme(legend.position = "none") + geom_hline(yintercept = 0,
+ color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = Group,
+ shape = Group2), alpha = 0.55, size = 3) + scale_fill_discrete(name = "Group") +
+ labs(y = title)
+ # avoiding labels superposition
+
+
+ return(p1)
+ }
>
>
>
> compare_pls <- function(X, Y, center, scale) {
+
+ ## Function PLS2 that compares X as independent or dependent variable.
+
+ pls_obj_xy <- plsr(X ~ Y, val.type = "CV", center = center, scale = scale)
+ pls_obj_yx <- plsr(Y ~ X, val.type = "CV", center = center, scale = scale)
+
+ pls_obj <- list(inde_micro = pls_obj_xy, inde_meta = pls_obj_yx)
+
+ return(pls_obj)
+
+ }
>
> modelos_o2pls <- function(X, Y) {
+
+ X <- scale(X)
+ Y <- scale(Y)
+ n <- 1
+
+ while (n <= 1) {
+ res_cv <- crossval_o2m_adjR2(X, Y, 1:4, 0:4, 0:4, nr_folds = 3)
+ res_cv_min_mse <- res_cv[which.min(res_cv[, 1]), ]
+ n <- as.numeric(res_cv_min_mse[2])
+
+ }
+
+ nx <- as.numeric(res_cv_min_mse[3])
+ ny <- as.numeric(res_cv_min_mse[4])
+
+ res <- o2m2(X = X, Y = Y, n = n, nx = nx, ny = ny)
+
+ return(res)
+
+
+ }Load the data
> general_data <- read.xlsx(file.path(params$folder.data, params$general.data), sheetIndex = 1)
> general_data$SEX <- factor(general_data$SEX, levels = c(0, 2), labels = c("Female",
+ "Male"))
> general_data$OBESE <- factor(general_data$OBESE, levels = c(0, 1), labels = c("No Obese",
+ "Obese"))
> general_data$GROUP <- factor(general_data$GROUP, levels = c(0, 1, 2), labels = c("Female",
+ "PCOS", "Male"))
>
> bacteria_phylum.abs <- as.data.frame(read.xlsx(file.path(params$folder.data, params$file.data),
+ sheetIndex = 1))
> bacteria_genera.abs <- as.data.frame(t(read.xlsx(file.path(params$folder.data, params$file.data),
+ sheetIndex = 3)))
>
> metabolome_file <- file.path(params$folder.data, file = params$metabolome.data) ## file path of the data
> metabolome.tmp <- read.xlsx(metabolome_file, sheetIndex = 1) # read the data
> any(is.na(metabolome.tmp)) # there is a row of missing values.[1] TRUE
> metabolome <- metabolome.tmp[-nrow(metabolome.tmp), ] ## remove the missing value row
> metabolome$GROUP <- general_data$GROUP ## add GROUP variable
> metabolome$OBESE <- general_data$OBESE ## add OBESE variables.Select those samples in common
> variables_in_bacteria <- general_data[general_data$Orden %in% bacteria_phylum.abs$Order,
+ ]The next samples not are in the metagenome dataset
> general_data[!general_data$Orden %in% bacteria_phylum.abs$Order, ][, 1:5] Orden Paciente SEX GROUP OBESE
1 1 MdGLP2 Female Female No Obese
22 22 PdGLP5 Female PCOS No Obese
25 25 PdGLP10 Female PCOS No Obese
36 36 VdGLP2 Male Male No Obese
42 42 VdGLP8 Male Male No Obese
51 51 VOGLP7 Male Male Obese
53 53 VOGLP10 Male Male Obese
Before start the analysis, let’s create phyloseq objects.
> phylum.list <- proces_bacteria(bacteria_phylum.abs, tipo = "phylum")
> genera.list <- proces_bacteria(bacteria_genera.abs, tipo = "genera")
>
> phylum.abs <- phylum.list$abs
> phylum.rel <- phylum.list$rel
> genera.abs <- genera.list$abs
> genera.rel <- genera.list$rel
>
> phyloseq_phyla_abs <- create_phyloseq(phylum.abs, "phylum")
> phyloseq_phyla_rel <- create_phyloseq(phylum.rel, "phylum")
>
> phyloseq_genera_abs <- create_phyloseq(genera.abs, "genus")
> phyloseq_genera_rel <- create_phyloseq(genera.rel, "genus")Before starting the integrative analysis, let’s exclude those samples, on the metabolome that are not in the microbiome. And exclude the postprandial variables on the metabolome
> metabolome_common <- metabolome[metabolome$Order %in% variables_in_bacteria$Orden,
+ ]Let’s consider only the basal data set of the metabolome
> X <- as.matrix(metabolome_common[, -c(1, 2, ncol(metabolome_common) - 1, ncol(metabolome_common))])
>
> X.post <- X[, grep("AU[GC]", colnames(X), invert = F)]
> X.basal_tot <- X[, grep("AU[GC]", colnames(X), invert = T)]
> X.mean_basal <- X.basal_tot[, grep("mean", colnames(X.basal_tot))]
> X.basal <- X.basal_tot[, grep("mean", colnames(X.basal_tot), invert = T)] # basal values of the metabolomeNow let’s extract the microbiome data, the genera and the phylum, relative and absolute abundance.
> Y.abs.p <- as.matrix(phylum.abs[, -c(ncol(phylum.abs) - 2, ncol(phylum.abs) - 1,
+ ncol(phylum.abs))])
> Y.abs.p <- Y.abs.p[, colSums(Y.abs.p) > 0] # absolute abundance phyla
> Y.rel.p <- as.matrix(phylum.rel[, -c(ncol(phylum.rel) - 2, ncol(phylum.rel) - 1,
+ ncol(phylum.rel))])
> Y.rel.p <- Y.rel.p[, colSums(Y.rel.p) > 0] # relative abundance phyla
> Y.abs.g <- as.matrix(genera.abs[, -c(ncol(genera.abs) - 2, ncol(genera.abs) - 1,
+ ncol(genera.abs))])
> Y.abs.g <- Y.abs.g[, colSums(Y.abs.g) > 0] # absolute abundance genera
> Y.rel.g <- as.matrix(genera.rel[, -c(ncol(genera.rel) - 2, ncol(genera.rel) - 1,
+ ncol(genera.rel))])
> Y.rel.g <- Y.rel.g[, colSums(Y.rel.g) > 0] # relative abundance generalNotes on integration microbiome-metabolome.
Firstly we have to take into account same samples across blocks. Data has to be standardized and mean centered. We are going to explore PLS2 and PLS2-O. On both methods, we are going to explore the dependence of the microbiome on the metabolome and viceversa.
PLS2 integration.
> lista_relaciones <- list(abs_p = list(meta = X.basal, micro = Y.abs.p), rel_p = list(meta = X.basal,
+ micro = Y.rel.p), abs_g = list(meta = X.basal, micro = Y.abs.g), rel_g = list(meta = X.basal,
+ micro = Y.rel.g))
>
>
>
> lista_pls_res <- vector(mode = "list", length = length(lista_relaciones))
> names(lista_pls_res) <- c("abs_p", "rel_p", "abs_g", "rel_g")
> for (rel in 1:length(lista_relaciones)) {
+
+ X <- as.matrix(lista_relaciones[[rel]]$meta)
+ Y <- as.matrix(lista_relaciones[[rel]]$micro)
+ lista_pls_res[[rel]] <- compare_pls(X, Y, center = T, scale = T)
+ }> grupos <- list(variables_in_bacteria$GROUP, variables_in_bacteria$OBESE) ##factors to be plotted
> lista_graficos_pls_inde_meta <- vector(mode = "list", length = 4)
> lista_graficos_pls_inde_micro <- vector(mode = "list", length = 4)
>
> for (res in 1:length(lista_pls_res)) {
+
+ inde_meta <- lista_pls_res[[res]]$inde_meta
+ inde_micro <- lista_pls_res[[res]]$inde_micro
+ lista_graficos_pls_inde_micro[[res]] <- plotPLS2(inde_micro, title = paste("independiente: microbioma",
+ names(lista_pls_res)[res]), labels = rownames(X), cmpx = 1, cmpy = 2)
+ lista_graficos_pls_inde_meta[[res]] <- plotPLS2(inde_meta, title = paste("independiente_metabolome",
+ names(lista_pls_res)[res]), labels = rownames(X), cmpx = 1, cmpy = 2)
+
+
+ }Graficos independiente microbioma.
> ggarrange(lista_graficos_pls_inde_micro[[1]], lista_graficos_pls_inde_micro[[2]],
+ lista_graficos_pls_inde_micro[[3]], lista_graficos_pls_inde_micro[[4]], common.legend = T)Graficos independiente metaboloma
> ggarrange(lista_graficos_pls_inde_meta[[1]], lista_graficos_pls_inde_meta[[2]], lista_graficos_pls_inde_meta[[3]],
+ lista_graficos_pls_inde_meta[[4]], common.legend = T)Summary results PLS2
The best output is with the independent variable the metabolome, with better results on the genus.
OPLS2
Firstly we have to determine the number of components, we are going to scale and center the data. Also we are going to explore the independence or dependence of the metabolome.
> lista_relaciones_2 <- list(meta = X.mean_basal, micro_phyla_abs = Y.abs.p, micro_phyla_rel = Y.rel.p,
+ micro_gen_abs = Y.abs.g, micro_gen_rel = Y.rel.g)
>
>
> res_o2m_scaleT <- vector(mode = "list", length = 4)
>
> names(res_o2m_scaleT) <- c("abs_phyla", "rel_phyla", "abs_gen", "rel_gen")
>
> for (m in 1:4) {
+
+
+ res_o2m_scaleT[[m]] <- modelos_o2pls(lista_relaciones_2$meta, lista_relaciones_2[[m +
+ 1]])
+
+
+
+ }> lista_graficos_scale_T <- vector(mode = "list", length = 4)
> names(lista_graficos_scale_T) <- c("abs_phyla", "rel_phyla", "abs_gen", "rel_gen")
>
>
> aux.fun <- function(x) {
+ if (length(x) > 1) {
+ return(which.max(x))
+ } else {
+ return(1)
+ }
+ }
>
> for (i in 1:4) {
+
+ varXjoint <- aux.fun(res_o2m_scaleT[[i]]$flags$varXjoint)
+ varYjoint <- aux.fun(res_o2m_scaleT[[i]]$flags$varYjoint)
+ varXorth <- aux.fun(res_o2m_scaleT[[i]]$flags$varXorth)
+ varYorth <- aux.fun(res_o2m_scaleT[[i]]$flags$varYorth)
+
+
+ lista_graficos_scale_T[[i]] <- plot_O2PLS(obj_pls = res_o2m_scaleT[[i]], title = names(res_o2m_scaleT)[i],
+ xcomp = 1, ycomp = 2, xorto = varXorth, yorto = varYorth)
+
+
+ }Vemos los gráficos escalados y sin escalar del filo
> lista_graficos_scale_T$abs_phyla> lista_graficos_scale_T$rel_phylagenero
> lista_graficos_scale_T$abs_gen> lista_graficos_scale_T$rel_genDIABLO
> rownames(X.basal) <- rownames(Y.abs.p)With GROUP variable as outcome.
Only phylum
absolute abundance
> outcome <- variables_in_bacteria$GROUP
>
> datos <- list(metabolomics = X.basal, filo_abs = Y.abs.p)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics filo_abs
metabolomics 0.0 0.1
filo_abs 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 3 3 3
Overall.BER 3 3 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")relative abundance
> outcome <- variables_in_bacteria$GROUP
>
> datos <- list(metabolomics = X.basal, filo_rel = Y.rel.p)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics filo_rel
metabolomics 0.0 0.1
filo_rel 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 3 3 3
Overall.BER 3 3 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")Only genera
absolute abundance
> outcome <- variables_in_bacteria$GROUP
>
> datos <- list(metabolomics = X.basal, genus_abs = Y.abs.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics genus_abs
metabolomics 0.0 0.1
genus_abs 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 4 2 2
Overall.BER 4 2 2
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")relative abundance
> outcome <- variables_in_bacteria$GROUP
>
> datos <- list(metabolomics = X.basal, genus_rel = Y.rel.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics genus_rel
metabolomics 0.0 0.1
genus_rel 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 1 8 10
Overall.BER 1 1 5
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }Both
absolute abundance
> outcome <- variables_in_bacteria$GROUP
>
> datos <- list(metabolomics = X.basal, abs_filo = Y.abs.p, abs_genus = Y.abs.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics abs_filo abs_genus
metabolomics 0.0 0.1 0.1
abs_filo 0.1 0.0 0.1
abs_genus 0.1 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 2 3 3
Overall.BER 2 3 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }relative abundance
> outcome <- variables_in_bacteria$GROUP
>
> datos <- list(metabolomics = X.basal, rel_filo = Y.rel.p, rel_genus = Y.rel.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics rel_filo rel_genus
metabolomics 0.0 0.1 0.1
rel_filo 0.1 0.0 0.1
rel_genus 0.1 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 3 2 3
Overall.BER 3 2 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }With OBESE variable as outcome.
Only phylum
absolute abundance
> outcome <- variables_in_bacteria$OBESE
>
> datos <- list(metabolomics = X.basal, filo_abs = Y.abs.p)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics filo_abs
metabolomics 0.0 0.1
filo_abs 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 2 2 2
Overall.BER 2 2 2
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }relative abundance
> outcome <- variables_in_bacteria$OBESE
>
> datos <- list(metabolomics = X.basal, filo_rel = Y.rel.p)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics filo_rel
metabolomics 0.0 0.1
filo_rel 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 3 2 3
Overall.BER 3 2 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }Only genera
absolute abundance
> outcome <- variables_in_bacteria$OBESE
>
> datos <- list(metabolomics = X.basal, genus_abs = Y.abs.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics genus_abs
metabolomics 0.0 0.1
genus_abs 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 2 2 2
Overall.BER 2 2 2
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }relative abundance
> outcome <- variables_in_bacteria$OBESE
>
> datos <- list(metabolomics = X.basal, genus_rel = Y.rel.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics genus_rel
metabolomics 0.0 0.1
genus_rel 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 1 1 1
Overall.BER 1 1 1
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }Both
absolute abundance
> outcome <- variables_in_bacteria$OBESE
>
> datos <- list(metabolomics = X.basal, abs_filo = Y.abs.p, abs_genus = Y.abs.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics abs_filo abs_genus
metabolomics 0.0 0.1 0.1
abs_filo 0.1 0.0 0.1
abs_genus 0.1 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 4 5 4
Overall.BER 4 5 4
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }relative abundance
> outcome <- variables_in_bacteria$OBESE
>
> datos <- list(metabolomics = X.basal, rel_filo = Y.rel.p, rel_genus = Y.rel.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics rel_filo rel_genus
metabolomics 0.0 0.1 0.1
rel_filo 0.1 0.0 0.1
rel_genus 0.1 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 2 2 2
Overall.BER 2 2 2
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }With OBESE and GROUP variables as outcome.
Only phylum
absolute abundance
> outcome <- with(variables_in_bacteria, interaction(GROUP, OBESE))
>
> datos <- list(metabolomics = X.basal, filo_abs = Y.abs.p)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics filo_abs
metabolomics 0.0 0.1
filo_abs 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 5 4 4
Overall.BER 5 4 4
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }relative abundance
> outcome <- with(variables_in_bacteria, interaction(GROUP, OBESE))
>
> datos <- list(metabolomics = X.basal, filo_rel = Y.rel.p)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics filo_rel
metabolomics 0.0 0.1
filo_rel 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 4 4 3
Overall.BER 4 4 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }Only genera
absolute abundance
> outcome <- with(variables_in_bacteria, interaction(GROUP, OBESE))
>
> datos <- list(metabolomics = X.basal, genus_abs = Y.abs.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics genus_abs
metabolomics 0.0 0.1
genus_abs 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 7 3 4
Overall.BER 7 4 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 2)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }relative abundance
> outcome <- with(variables_in_bacteria, interaction(GROUP, OBESE))
>
> datos <- list(metabolomics = X.basal, genus_rel = Y.rel.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics genus_rel
metabolomics 0.0 0.1
genus_rel 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 2 1 1
Overall.BER 2 1 1
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }Both
absolute abundance
> outcome <- with(variables_in_bacteria, interaction(GROUP, OBESE))
>
> datos <- list(metabolomics = X.basal, abs_filo = Y.abs.p, abs_genus = Y.abs.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics abs_filo abs_genus
metabolomics 0.0 0.1 0.1
abs_filo 0.1 0.0 0.1
abs_genus 0.1 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
>
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 3 3 3
Overall.BER 3 3 3
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }relative abundance
> outcome <- with(variables_in_bacteria, interaction(GROUP, OBESE))
>
> datos <- list(metabolomics = X.basal, rel_filo = Y.rel.p, rel_genus = Y.rel.g)
> design = matrix(0.1, ncol = length(datos), nrow = length(datos), dimnames = list(names(datos),
+ names(datos)))
> diag(design) = 0
>
> design metabolomics rel_filo rel_genus
metabolomics 0.0 0.1 0.1
rel_filo 0.1 0.0 0.1
rel_genus 0.1 0.1 0.0
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = 10, design = design)
> set.seed(123) # for reproducibility, only when the `cpus' argument is not used
> # this code takes a couple of min to run
> perf.diablo = mixOmics::perf(sgccda.res, validation = "Mfold", folds = 10, nrepeat = 10)
>
> # perf.diablo # lists the different outputs
> plot(perf.diablo)> perf.diablo$choice.ncomp$WeightedVote max.dist centroids.dist mahalanobis.dist
Overall.ER 2 1 7
Overall.BER 9 1 7
> ncomp = perf.diablo$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
>
> sgccda.res = block.splsda(X = datos, Y = outcome, ncomp = ncomp, design = design)> plotDiablo(sgccda.res, ncomp = 1)> if (!ncomp < 2) {
+
+ plotIndiv(sgccda.res, ind.names = FALSE, legend = TRUE, title = "DIABLO")
+
+ }